APPENDIX 3 



) 



/** 

* IPS rule processing functions 

* @file rules.h 

* ©author jmccaskey 
*/ 

#ifndef RULES H 

#define RULES H 

r 

* Function to process all the rules related to a single monitor. 

* The function will select all the rules for the monitor and then evaluate them in turn. 

* It will insert any neccesary errorjog entries and update escalations as it goes. 
V 

int process_rules(monitor *mon, long double gauge, MYSQL *mysql_connection); 

r 

* Function to adjust timestamp to users timezone from UTC timestamp. 

* The function should return 0 for sucess 1 for failure. 
7 

int timezone_adjust( struct tm **tm_struct, int timezone_offset, int daylight); 

r 

* Function to test whether a rule (as represented by a MYSQL_ROW object) is active for the specified 

* tm structure (which should be the polling time in the users timezone). Function returns 1 if the rule 

* passed the check, 0 otherwise. 
V 

int test_date(struct tm *tm_struct, MYSQL_ROW row); 
/** 

* Function to format a value to approriate unit increment. It is assumed that the value coming in 

* is already in the base unit (ie bps, or packets, not kbps or (thou)packets). adjusted_unit will 

* be dynamically allocated and the caller must free it at a later point. 
V 

long double format_value(char *adjusted_unit, long double old_value, char *old_unit); 
/** 

* Function to test whether a static rule is violated. 
*/ 

void test_static_threshold(unsigned int rulejd, unsigned int rule_serverjd, monitor *mon, 
char *operator, long double value, long double pollj/alue); 

/** 

* Function to test whether a variable rule is violated. 
*/ 

void test_variable_threshold(int rulejd, int rule_serverjd, monitor *mon, 

char 'operator, long double value, long double polLvalue, char *value Jype, 
int time_value, char *time_unit, MYSQL *mysql_connection); 

/** 

* Function to test whether a cumulative rule is violated. 
7 

void test_cumulative_threshold(int rulejd, int rule_serverjd, monitor *mon, 

char *operator, long double value, long double poll_value, int percent, 
int time_value, char *time_unit, MYSQL *mysql_connection); 



* Function to test whether a stddev rule is violated. 
*/ 

void test_stddevJhreshold(int rulejd, int rule_server_id, monitor *mon f 

char *operator, long double value, long double poll_value, int time_value, 
char *time_unit, MYSQL *mysql_connection); 



#include "rules. c" 



#endif 



r 

* IPS rule processing functions 

* @file rules.c 

* @author jmccaskey 
*/ 

/** 

* Function to process all the rules related to a single monitor. 

* The function will select all the rules for the monitor and then evaluate them in turn. 

* It will insert any neccesary errorjog entries and update escalations as it goes. 
7 

int process_rules(monitor *mon, long double gauge, MYSQL *mysql_connection) { 
//select all the rules for the monitor 
MYSQL_RES 'result; 
MYSQL_ROW row; 
int n; 

char *sql_query; 

assert(sql_query=malloc(1000)); 

n=snprintf(sql_query, 1000, "SELECT rule.rulejd, rule.rule_serverjd, thresholdjype, 
timeframe_start, timeframe_stop, " 

"timeframe_all, monday, tuesday, Wednesday, thursday, friday, Saturday, Sunday, primary_email, 
primary_pager, primary_escalation_de!ay, " 

"secondary_email, secondary_pager, secondary_escalation_delay, tertiary__email t tertiary_pager, 

tertiary_escalation_delay, current_escalation, " 

"offset, daylight FROM rule, rulejnonitor, user, account, timezone " 

"WHERE rule.rulejd = rulejnonitor.rulejd " 

"AND rulejnonitor.monitorjd = %d " 

"AND ruie_monitor.monitor_server_id = %d " 

"AND rule.userjd = user.userjd " 

"AND rule.user_server_id = user.user_server_id " 

"AND account.accountjd = user.accountjd " 

"AND account.account_serverjd = user.account_server_id " 

"AND user, timezone jd = timezone. timezone jd " 

"AND user.timezone_serverjd = timezone.timezone_server_id " 

"AND ruie.active- on' ", mon->monitorjd, mon->monitor_serverjd); 

if(mysqLreal_query(mysql_connection, sql_query, n)!=0) { 
fprintf(stderr, "Failed while attempting to select rules: Error: %s\n", 
mysql_error(mysql_connection)); 
free(sql_query); 
return(1); 

} 

free(sql_query); 

//store results from query 
result=mysql_store_result(mysqLconnection); 

//loop through all the rows checking each rule as we go 
while(row=mysql_fetch_row( result)) { 

r 

* check if the rule currently applies (ie turned on for current day/time) 
7 

//copy the timestamp to a new variable and adjust it to the users local timezone 
struct tm *tm_struct; 

if(timezone_adjust(&tm_struct, atoi(row[23]), atoi(rcw[24]))) { 

//the timezone adjustment failed, don't evaluate this rule... 



#ifdef DEBUG 



#endif 



row[1]); 



continue; 

} 

if(test_date(tm_struct, row)) { 

MYSQL_RES *results_extra; 
MYSQL_ROW row_extra; 

//the rule is active for this polling period, perform checking 
flockfile(stdout); 

fprintf(stdout, "Evaluating Rule: %s t %s\n", row[0], row[1]); 
funlockfile(stdout); 

//check which type of rule it is and select any addtional type specific parameters 
if(strcmp(row[2], "static")==0) { 

assert(sql_query = malloc(IOOO)); 

n=snprintf(sql_query, 1000, "SELECT operator, value FROM rule_static " 
"WHERE rule_id=%s AND rule_serverjd=%s LIMIT 1", row[0], 



if(mysql_real_query(mysqLconnection, sql_query ( n)!=0) { 
fprintf(stderr, "Failed while attempting to select rule details: Error: 
%s\n", mysqLerror(mysql_connection)); 

free(sqLquery); 

continue; //continue on to the next rule maybe it will work 

} 

free(sql_query); 

//store results from query 
results_extra=mysql_store_result(mysqi_connection); 



row_extra[0], 



if(row_extra=mysql_fetch_row(results_extra)) { 
//call actual static evaluation here 
test_static_threshold(atoi(row(0]), atoi(row[1]), mon, 



atof(row_extra[1]), gauge); 
} 

mysql_free_result(results__extra); 
} else if(strcmp(row[2], M variable")==0) { 
assert(sql_query = malloc(IOOO)); 

n=snprintf(sql_query, 1000, "SELECT operator, value, time_value, 
time_unit, valuejype FROM rule_variable " 

"WHERE rule_id=%s AND rule_server_id=%s LIMIT 1", row[0], 

row[1J); 

if(mysqLreaLquery(mysql_connection, sqLquery, n)!=0) { 
fprintf(stderr, "Failed while attempting to select rule details: Error: %s\n", 
my sq l_error( my sq l_conn ection )); 

free(sql_query); 

continue; //continue on to the next rule maybe it will work 

} 

free(sql_query); 

//store results from query 
results_extra=mysql_store_result(mysql_connection); 

if(row_extra=mysql_fetch_row(results_extra)) { 
//call actual variable evaluation here 
test_variable_threshold(atoi(row(0]), atoi(row[1]), mon f 



row_extra[0], atof(row_extra[1]), gauge, row_extra[4], 

atoi(row_extra[2]), row_extra[3] t 

mysql_connection); 

} 

mysql_free_result(results_extra); 
} else if(strcmp(row[2] t "cumulative")==0) { 
assert(sql_query = malloc(IOOO)); 

n=snprintf(sql_query, 1000, "SELECT operator, value, time_value, 
time_unit, percent FROM rule_cumulative " 

"WHERE rulejd=%s AND rule_server_id=%s LIMIT 1", row[0], 

row[1]); 

if(mysql_real_query(mysql_connection, sql_query, n)!=0) { 
fprintf(stderr, "Failed while attempting to select rule details: Error: %s\n", 
mysql_error(mysqLconnection)); 

free(sql_query); 

continue; //continue on to the next rule maybe it will work 

} 

free(sql_query); 

//store results from query 
results__extra=mysqLstore_result(mysql_connection); 

if(row_extra=mysql_fetch_row(results_extra)) { 
//call actual cumulative evaluation here 

test_cumulative_threshold(atoi(row[0]), atoi(row[1]), mon, 
row_extra[0], atof(row_extra[1]), gauge, atoi(row_extra[4]), 

atoi(row_extra[2]), row_extra[3], mysql_connection); 

} 

mysqLfree_result(results_extra); 
} else if(strcmp(row[2], "stddev")==0) { 

assert(sql_query = malloc(IOOO)); 

n=snprintf(sql_query, 1000, "SELECT operator, value, time_value, 

time_unit FROM rule_stddev " 

"WHERE rulejd=%s AND rule_serverjd=%s LIMIT 1", row[0], 

row[1]); 

if(mysql_real_query(mysql_connection, sql_query, n)!=0) { 
fprintf(stderr, "Failed while attempting to select rule details: Error: %s\n", 
mysq l_e rror( my sq l_con nectio n )) ; 

free(sql_query); 

continue; //continue on to the next rule maybe it will work 

} 

free(sql_query); 

//store results from query 
results__extra=mysql_store_result(mysql_connection); 

if(row_extra=mysql_fetchjx>w(results_extra)) { 
//call actual stddev evaluation here 
test_stddev_threshold(atoi(row[0]), atoi(row[1]), mon, 

row_extra[0], atof(row_extra[1]), gauge, 

atoi(row_extra[2]), row_extra[3], mysqLconnection); 

} 

mysql_free_result(results_extra); 

} 

//(should actually go in individual functions) insert errorjog entries if needed... 



} 

} 

mysql_freej*esult(resu1t); 
return(O); 

} 

/** 

* Function to adjust timestamp to users timezone from UTC timestamp. 

* The function should return 0 for sucess 1 for failure. 
7 

int timezone_adjust(struct tm **tm_struct, int timezone_offset, int daylight) { 
if(daylight) { 

timezone_offset += 1; 

} 

if(timezone_pffset!=0) { 

//set user time in seconds since since 1970 to match polling period start time... 
timej utime = rawtime; 

//adjust value based off timezone info... +/- n hours 
utime += timezone_offset*3600; 
//get tm structure from utime value 
*tm_struct = loca!time(&utime); 

} 

return(O); 

} 

r 

* Function to test whether a rule (as represented by a MYSQL_ROW object) is active for the specified 

* tm structure (which should be the polling time in the users timezone). Function returns 1 if the rule 

* passed the check, 0 otherwise. 
7 

int test_date(struct tm *tm_struct, MYSQL_ROW row) { 

int dotw_passed = 0; 
#ifdef DEBUG 

flockfile(stdout); 

fprintf(stdout, "UserTime: Day of Week: %d, Hour: %d\n", tm_struct->tm_wday, tm_struct- 
>tm__hour); 

funlockfile(stdout); 

#endif 

//check what day it is, and whether the bit for that day is set in the rule we are checking 
if(tm_struct->tm_wday==0) { 

if(strcmp(row[1 2], M on")==0) { 
dotw_passed = 1 ; 

} 

} else if(tm_struct->tm_wday==1) { 
if(strcmp(row[6], "on")==0) { 
dotw_passed = 1 ; 

} 

} else if(tm_struct->tm_wday==2) { 
if(strcmp(row[7], M on")==0) { 
dotw_passed - 1 ; 

} 

} else if(tm_struct->tm_wday==3) { 
if(strcmp(row[8], M on")==0) { 
dotw_passed - 1 ; 
} 

} else if(tm_struct->tm_wday==4) { 



if(strcmp(row[9], "on")==0) { 
dotw_passed = 1 ; 

} 

} else i f ( t m_stru ct-> tm_wd ay = = 5 ) { 
if(strcmp(row[10], "on M )==0) { 
dotw_passed = 1 ; 

} 

} else if(tm_struct->tm_wday==6) { 
if(strcmp(row[11] l M on M )==0){ 
dotw_passed = 1 ; 

} 

} 

if(dotw_passed==1 ) { 

//check if the rule is set for timeframe_all, if so check has passed 
if(strcmp(row[5], "on")==0) 
return 1; 

if(tm_struct->tm_hour >= atoi(row[3]) && tm_struct->tm_hour < atoi(row[4])) 
return 1 ; 

else 

return 0; 

} else { 

return 0; 

} 

} 

r 

* Function to convert time_value from rules into seconds based off time_unit. 
*/ 

unsigned long int time_to_seconds(int time_value, char *time_unit) { 
unsigned long int seconds = 0; 
if(strcmp(time_unit, "hours") =: =0) 

seconds = time_value * 3600; 
else if(strcmp(time_unit, "days")— 0) 

seconds = time_value * 86400; 

else 

seconds = time_value; 
return seconds; 

} 

r 

* Function to format a value to approriate unit increment. It is assumed that the value coming 

* is already in the base unit (ie bps, or packets, not kbps or (thou)packets). adjusted_unit will 

* be dynamically allocated and the caller must free it at a later point. 
V 

long double format_value(char *adjusted_unit, long double old_value, char *old_unit) 
{ 

if(strcmp(old jjnit, M available")==0) { 

sprintf(adjusted_unit, "available"); 
return old_value; 

} 

long double adjusted_value = 0; 

if(strcmp(old_unit, "bits")==0 || strcmp(old_unit, "bps")==0) { 
if(old_vatue > (long doub!e)1023 
&& old_value < (long double)1024*(long doub!e)1024) { 

adjusted_value = old_value/1024; 

sprintf(adjusted_unit, "k%s", old_unit); 



} else if(old_value > ((long double)1024*(long double)1024)-1 

&& old_value < (long double)1024*(long double)1024*(long double)1024) { 

adjusted_value = old_value/((long double)1024*(long double)1024); 

sprintf(adjusted_unit, "m%s", old_unit); 
} else if(old_value > ((long double)1024*(long double)1024*(long double)1024)-1 
&& old_value < (long doubie)1024*(long double)1024*(long double)1024*(long 

double)1024){ 

adjusted_value = old_value/((long double)1024*(long double)1024*(long 

double)1024); 

sprintf(adjusted_unit, "g%s", old_unit); 
} else if(old_value > ((long double)1024*(long double)1024*(long double)1024*(long 
double)1024)-1){ 

adjusted_value = old_value/((long double)1024*(long double)1024*(long 
double)1024*(longdouble)1024); 

sprintf(adjusted_unit, "t%s", old_unit); 

} else { 

adjusted_value = old_value; 
sprintf(adjustedjjnit t "%s" t old_unit); 

} 

} else if(strcmp(old_unit, "B")==0) { 

if(old_value > (long double)1023 
&& old_va1ue < (long double)1024*(long double)1024) { 

adjusted_value = old_value/1024; 

sprintf(adjusted_unit, "K%s", old_unit); 
} else if(old_value > ((long double)1024*(long double) 1024)-1 
&& old_va1ue < (long double) 1024*(long double)1024*(long double)1024) { 

adjusted_value = old_value/((!ong double)1024*(long double)1024); 

sprintf(adjusted_unit, H M%s'\ old_unit); 
} else if(old_va1ue > ((long double)1024*(long double)1024*(long double)1024)-1 
&& old_value < (long double)1024*(long double)1024*(!ong double)1024*(long double)1024) { 

adjusted_value = old_value/((long double)1024*(long double)1024*(long double)1024); 

sprintf(adjusted_unit, "G%s M , oldjjnit); 
} else if(old_value > ((long double)1024*(long double)1024*(long double)1024*(long 
double)1024)-1){ 

adjusted_value = old_value/((long double)1024*(long double)1024*(long double) 1024*(long 

double) 1024); 

sprintf(adjusted_unit, "T^os", old_unit); 
} else { 

adjusted_value = old_value; 
sprintf(adjusted_unit t "%s", oldjjnit); 

} 

} else { 

if(old_value > (long double)999 && old_value < (long double)1 000000) { 
adjusted_value = old_value/(long double)1000; 
sprintf(adjusted_unit, "(thou) %s'\ old_unit); 
} else if(old_vatue > (long double)999999 && old_value < (long double)1 000000000) { 
adjusted_va!ue = old_value/(long double)1 000000; 
sprintf(adjusted_unit, "(mil) %s", old_unit); 
} else if(old_value > (long double)999999999 && old_value < (long 
double)100000000*(long double)10000) { 

adjusted_value = old_value/((long double) 1000000*(long double)1000); 
sprintf(adjusted_unit, "(bil) %s M , old_unit); 
} else if(old_value > ((long double)1000000000*(long double)1000)-1) { 

adjusted_value = o!d_value/(long double)1000000000*(long double) 1000; 
sprintf(adjusted_unit, "(tril) %s", old_unit); 
} else { 



1 



adjusted_value = old_value; 

sprintf(adjustedjjnit, "%s", old_unit); 

> 

} 

return adjustedj/alue; 

} 

r 

* Function to test whether a static rule is violated. 
*/ 

void test_static_threshold(unsigned int rulejd, unsigned int rule_server_id, monitor *mon, 

char *operator, long double value, long double poll_value) 

{ 

#ifdef DEBUG 

flockfile(stdout); 

fprintf(stdout, "Static Rule Value: %Lf Polled Value: %Lf Divisor: %f\n", value, polij/alue, mon- 
>divisor); 

funlockfile(stdout); 

#endif 

//setup the error node 
errorjiode *errnode; 

assert(errnode = malloc(sizeof(*errnode))); 

errnode-> rulejd = rulejd; 
errnode->rule_serverJd = rule_server_id; 
errnode->monitor_id = mon->monitor_id; 
errnode->monitor_serverjd = mon->monitor_serverjd; 

//make sure we aren't going to be dividing by zero if someone stupidly put a 0 in for a metric in the 

db! 

if(mon->divisor == 0) 

mon->divisor = 1; 
poll_value /= mon->divisor; 

char value_unit_adjusted[strlen(mon->unit)+10]; 
long double value_adjusted; 

value_adjusted = format_value(value_unit_adjusted, value, mon->unit); 

char poll j/alue jjnit_adjusted[strien(mon->unit)+1 0]; 
long double poll_value_adjusted; 

poll_value_adjusted = format_value(pollj/aluejjnit_adjusted, poll_value, mon->unit); 

if(strcmp(operator, "falls below")==0 && poll_value < value) { 
errnode->failed = 1; 

snprintf(errnode->message, sizeof(errnode->message), 

"Falls below static threshold of %1 ,2Lf %s (Value: %1 .2Lf %s)", 
value_adjusted, valuejjnit_adjusted, poll_value_adjusted, 

poll_value_unit_adjusted); 

} else if(strcmp(operator f "exceeds")==0 && poll_value > value) { 
errnode->failed = 1 ; 
snprintf(errnode->message, sizeof(errnode->message), 

"Exceeds static threshold of %1 .2Lf %s (Value: %1 .2Lf %s)", 
value_adjusted, value_unit_adjusted, po!lj/alue_adjusted, 

poII_value_unit_adjusted); 

} else if(strcmp(operator, "equals")==0 && poII_va!ue == value) { 
errnode->failed = 1 ; 



snprintf(errnode->message, sizeof(errnode->message), 

"Equals static threshold of %1 .2Lf %s (Value: %12Lf %s)", 

value_adjusted, value_unit_adjusted, polLvalue_adjusted, 

poll_valuejjnit_adjusted); 

} else if(strcmp(operator, "unavailable")==0 && poll_value < 1) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Fails static availability threshold: 
Device was unavailable."); 
} else { 

//rule is not violated 
errnode->failed = 0; 
if(strcmp(operator, , 'exceeds")==0) 
snprintfferrnode^message, sizeof(errnode->message), 
"Does not exceed static threshold of %1.2Lf %s (Value: %1 .2Lf %s)", 
value_adjusted, value_unit_adjusted, poll_value_adjusted, 

poll_valuejjnit_adjusted); 

else if(strcmp(operator, "falls below")==0) 

snprintf(errnode->message 1 sizeof(errnode->message) l 
"Does not fall below static threshold of %1 .2Lf %s (Value: %12Lf %s)", 
value_adjusted, value_unit_adjusted, polLvalue_adjusted, 

poll_value_unit_adjusted); 

else if(strcmp(operator, "equals")==0) 

snprintf(errnode->message 1 sizeof(errnode->message), 
"Does not equal static threshold of %1.2Lf %s (Value: %1.2Lf %s)", 

value_adjusted, value_unit_adjusted t poll_value_adjusted, 

poll_va I uejj n it_adj u sted ) ; 

else if(strcmp(operator, "unavailable")==0) 

snprintf(errnode->message t sizeof(errnode->message), "Passes static availability 
threshold: Device was available."); 

else 

snprintf(errnode->message, sizeof(errnode->message), ""); 

} 

//push error_node 

pthread_mutex_lock(&error_work_queue.mutex); 
queue_put(&error_work_queue.c_queue, (queue_node *)errnode); 
pthread_mutex_unlock(&error_work_queue.mutex); 
pthread^cond^roadcas^&error^work^queue.cond); 

return; 

} 

r 

* Function to test whether a variable rule is violated. 
*/ 

void test_variable_threshold(int rulejd, int rule_server_id i monitor *mon t 

char Operator, long double value, long double poll_vaiue, char 

*value_type, 

int time_value, char *time_unit, MYSQL *mysql_connection) { 
//make sure we don't divide by zero because someones put a bad value in the metrics table! 
if(mon->divisor==0) 

mon->divisor = 1 ; 

#ifdef DEBUG 

flockfile(stdout); 

fprintf(stdout, "Variable Rule Value: %Lf Polled Value: %Lf Divisor: %f Time Value: %d Time Unit: 
%s\n" t 



value, poll_value, mon->divisor, time_value, time_unit); 

funlockfile(stdout); 
#endif 

//setup the error node 
error_node *errnode; 

assert(errnode = malloc(sizeof(*errnode))); 
errnode->rule_id = rulejd; 
errnode->rule_server_id = rule_server_id; 
errnode->monitor_id = mon->monitor_id; 

errnode->monitor_serverjd = mon->monitor_serverJd; 

errnode->failed = 0; 

//evaluate the rule and generate the errnode->failed and errnode->message values 
char *sql_query; 
int n; 

MYSQL_RES 'result; 
MYSQL_ROW row; 

unsigned long int seconds; 
long double average; 

seconds = time_to_seconds(time_value, time_unit); 
assert(sql_query = malloc(800)); 

n=snprintf(sql_query, 800, "SELECT AVG(gauge) AS gauge FROM eventjog WHERE " 
"monitor_id=%d AND monitor_server_id=%d AND timestamp >= 
DATE_SUB(NOW(), INTERVAL %d SECOND) " 

"AND timestamp <= NOW() GROUP BY monitoMd", mon->monitorjd, 
mon->monitor__serverjd, seconds); 

if(mysql_reaLquery(mysql_connection, sql_query, n)!=0) { 
flockfile(stderr); 

fprintf(stderr, "Failed while attempting to select AVG for variable rule... aborting rule evaluation: 
Error: %s\n", mysql_error(mysql_connection)); 
funlockfile(stderr); 

free(errnode); 

free(sqLquery); 

return; 

} 

free(sql_query); 

//store results from last query into result 
result=mysql_store_result(mysql_connection); 

row=mysqLfetch_row(result); 
jf(row==NULL) { 
flockfile(stderr); 

fphntf(stderr, "Couldn't fetch average for variable rule, aborting rule evaluation... \n"); 
funlockfile(stderr); 

free(errnode); 
mysql_free_result(result); 

return; 

} 

average = atof(row[0]); 
mysql_free_result(result); 



if(strcmp(va!ue__type, "unir)==0) { 
#ifdef DEBUG 



flockfile(stdout); 

fprintf(stdout, "Testing Var: Avg: %Lf Polled: %Lf\n", average, poll_value); 
funlockfile(stdout); 

#endif 

long double test_value; . 

//figure out the delta from the average over the period 
test_value = (poll_value - average) / mon->divisor; 

char value_unit_adjusted[strlen(mon->unit)+10]; 
long double value_adjusted; 
value_adjusted = format_value(value_unit_adjusted, value, mon->unit); 

//char test_value_unit_adjusted[strlen(mon->unit)+10]; 
//long double test_value_adjusted; 

//test_value_adjusted = format_value(test_valuejjnit_adjusted, test_value, mon->unit); 

if(strcmp(operator, "increases")==0 && test_yalue > value) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Increased above 

variable threshold of " 

"%1.2Lf %s in %d %s", value_adjusted, value_unit_adjusted, time_value, 

time_unit); 

} else if(strcmp(operator, "decreases")==0 && test_value < (-1*value)) { 
errnode->fai1ed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Descreased below 

variable threshold of " 

"%1.2Lf %s in %d %s", value__adjusted, value_unit_adjusted, time_value, 

time__unit); 

} else { 

errnode->failed = 0; 
if(strcrnp(operator, M increases")==0) { 

snprintf(errnode->message, sizeof(errnode->message), "Passes variable 

threshold: Did not increase " 

"%1.2Lf %s in %d %s", value_adjusted, value_unit_adjusted, 

time_value, time_unit); 

} else if(strcmp(operator, "decreases")==0) { 

snprintf(errnode->message t sizeof(errnode->message), "Passes variable 

threshold: Did not decrease " 

"%1.2Lf %s in %d %s", value_adjusted, value_unit_adjusted, 

time_value, time_unit); 

} 

} 

} else if(strcmp(value_type, "percent")==0) { 
double percent; 

//figure out the percentage change from the average over the period 
if(average==0) { 

percent = 0; 

} else { 

percent = (poll_value - average) / average * 100; 

} 

#ifdef DEBUG 

flockfile(stdout); 

fprintf(stdout, "Testing Var: Avg: %Lf PollVal: %Lf PctChange: %f ThresholdPct: %Lf\n", 
average, poll_value, percent, value); 

funlockfile(stdout); 

#endif 



if(strcmp(operator, "increases")==0 && percent >= value) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Increased by more than 
the variable threshold of %1.2Lf%% in %d %s'\ 

value, time_value, time_unit); 
} else if(strcmp(operator, "decreases")==0 && (-1*percent) >= value) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Decreased by more 
than the variable threshold of %1 .2Lf%% in %d %s", 

value, time_value, timejjnit); 

} else { 

errnode->failed = 0; 
if(strcmp(operator, M increases")==0) { 

snprintf(errnode->message, sizeof(errnode->message), "Passes variable 

threshold: Did not increase " 

"by more than %1.2Lf%% in %d %s", value, time_value, 

timejjnit); 

} else if(strcmp(operator, "decreases")==0) { 

snprintf(errnode->message, sizeof(errnode->message), "Passes variable 

threshold: Did not decrease " 

"by more than %12Lf%% in %d %s", value, time_value, 

timejjnit); 

} 

} 

} 

//push error_node 
pthread_mutex_lock(&error_work_queue.mutex); 
queue__put(&error_work_queue.c_queue, (queue_node *)errnode); 

pthread_mutex_unlock(&error_work_queue.mutex); 

pthread_cond_broadcast(&error_work_queue.cond); 

return; 

} 

r 

* Function to test whether a cumulative rule is violated. 
•/ 

void test_cumulative_threshold(int rulejd, int rule_server_id, monitor *mon, 

char *operator, long double value, long double poll_value, int percent, 
int time_value, char *time_unit, MYSQL *mysqLconnection) { 
//make sure we don't divide by zero because someones put a bad value in the metrics table! 
if(mon->divisor==0) 
mon->divisor = 1; 
#ifdef DEBUG 

flockfile(stdout); 

fprintf(stdout, "Cumulative Rule Value: %Lf Polled Value: %Lf Percent: %d Divisor: %f Time Value: 
%d Time Unit: %s\n", 

value, poll_value, percent, mon->divisor, time_value, time_unit); 

funlockfile(stdout); 
#endif 

//setup the error node 

error_node *errnode; 

assert(ermode = malloc(sizeof(*errnode))); 
errnode->rulejd = rulejd; 
errnode>>rule_server_id = rule_server_id; 



errnode->monitorjd = mon->monitorjd; 
errnode->monitor_server_id = mon->monitor_serverjd; 
errnode->failed = 0; 

//evaluate the rule and generate the errnode->failed and errnode->message values 
char *sql_query; 
int n; 

MYSQL_RES 'result; 
MYSQL_ROW row; 

unsigned long int seconds; 
int violation_count; 
float percent_violated; 
char symbol; 

seconds = time_to_seconds(time_value, timejjnit); 

//convert availability rules into normal ones... 
if(strcmp(operator, "available")==0) { 
value = 0; 

} else if(strcmp(operator, "unavailable")==0) { 
value = 1; 

} 

if(strcmp(operator, "exceeds")==0 || strcmp(operator, "available")==0) { 
symbol = ">'; 

} else if(strcmp(operator, "falls below")==0 || strcmp(operator, M unavailable")==0) { 
symbol - •<*; 

} else { 

symbol = '-; 

} 

assert(sql_query = malloc(800)); 

n=snprintf(sqLquery t 800, "SELECT COUNTf) AS violation^count FROM eventjog WHERE " 

"monitorjd=%d AND monitor_serverjd=%d AND timestamp >= DATE_SUB(NOW(), 
INTERVAL %d SECOND) ** 

"AND timestamp <= NOW() AND gauge %c %Lf, mon->monitorjd, mon- 
>monitor__serverjd, seconds, symbol, value); 

if(mysql_reaLquery(mysql_connection, sql_query, n)!=0) { 
flockfile(stderr); 

fprintf(stderr, "Failed while attempting to select violation count for cumulative rule ... 
aborting rule evaluation: Error: %s\n", mysql_error(mysqLconnection)); 
funlockfile(stderr); 
free(errnode); 
free(sql_query); 
return; 

} 

free(sqLquery); 

//store results from last query into result 
result=mysqLstore_result(mysqLconnection); 
row=mysql_fetch_row( result); 
if(row==NULL) { 
flockfile(stderr); 

fprintf(stderr, "Couldn't fetch violation count for cumulative rule, aborting rule evaluation. .An"); 

funlockfile(stderr); 

free(errnode); 



mysql_free_result(result); 
return; 

} 

vio!ation_count = atoi(row[0]); 
mysqljree_resu1t(result); 

percent_violated = (float)violation_count / (seconds / 300) * 100; 

//flockfile(stdout); 

//fprintf(stdout, "vio count: %d percent vio: %ftn", violation_count, percent_violated); 
//funlockfile(stdout); 

char value jjnit_adjusted[strlen(mon->unit)+ 10]; 
long double value_adjusted; 

value_adjusted = format_value(value_unit_adjusted, value, mon->unit); 

if(strcmp(operator, "falls below n )==0 && percent_violated >= percent) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Fails cumulative threshold: Fell 
below %1.2Lf %s%1.2f" 

"percent of the time over last %d %s", value_adjusted, value_unit_adjusted, 
percent_violated, time_value, time_unit); 

} else if(strcmp(operator, "exceeds")==0 && percent_violated >= percent) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message) t "Fails cumulative threshold: 
Exceeded %1.2Lf %s %1 .2f " 

"percent of the time over last %d %s", value_adjusted, value_unit_adjusted, 
percenM/iolated, time_value, time_unit); 

} else if(strcmp(operator, "falls be!1ow")==0) { 
errnode->failed = 0; 

snprintf(errnode->message, sizeof(errnode->message), "Passes cumulative threshold: 
Did not fall below %1 .2Lf %s %d.00 " 

"percent of the time over last %d %s", value_adjusted, value_unit_adjusted, 
percent, time_value, time__unit); 

} else if(strcmp(operator, "exceeds")==0) { 
errnode->failed = 0; 

snprintf(errnode->message, sizeof(errnode->message), "Passes cumulative threshold: 
Did not exceed %1.2Lf %s %d.00 " 

"percent of the time over last %d %s", value_adjusted, value_unit_adjusted, 
percent, time_value, time_unit); 

} else if(strcmp(operator, "available")==0 && percent_violated >= percent) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Fails cumulative availability 
threshold: Available %1 .2f " 

"percent of the time over last %d %s", percent_violated, time_value, time_unit); 
} else if(strcmp(operator t "unavailable")==0 && percent_violated >= percent) { 
errnode->failed = 1; 

snprintf(errnode->message, sizeof(errnode->message), "Fails cumulative availability threshold: 
Unavailable %1.2f" 

"percent of the time over last %d %s" f percent_violated, time_value, time_unit); 
} else if(strcmp(operator, "available")==0) { 
errnode->failed = 0; 

snprintf(errnode->message, sizeof(errnode->message), "Passes cumulative availability 
threshold: Available %1 .2f " 

"percent of the time over last %d %s", percent_violated, time_value f time_unit); 



} else if(strcmp(operator, "unavailable")==0) { 
errnode->failed = 0; 

snprintf(errnode->message, sizeof(errnode->message), "Passes cumulative availability 
threshold: Unavailable %1 .2f " 

"percent of the time over last %d %s", percent_violated, time_value, time_unit); 

} 

//push error_node 
pthread_mutexJock(&error_work_queue.mutex); 
queue_put(&error_work_queue.c_queue, (queuejiode *)errnode); 

pthread_mutex_unlock(&error_work_queue.mutex); 

pthread_cond_broadcast(&error_work_queuexond); 

return; 

} 

r 

* Function to test whether a stddev rule is violated. 
V 

void test_stddev_threshold(int rulejd, int rule_server_id t monitor *mon, 

char 'operator, long double value, long double poll_value, int timej/alue, 
char *timejjnit, MYSQL *mysqLconnection) { 
//make sure we don't divide by zero because someones put a bad value in the metrics table! 
if(mon->divisor==0) 
mon->divisor = 1; 
#ifdef DEBUG 

flockfile(stdout); 

fprintf(stdout, "StdDev Rule Value: %Lf Polled Value: %Lf Divisor: %f Time Value: %d Time Unit: 
%s\n", 

value, pollj/alue, mon->divisor, timej/alue, timejinit); 
funlockfile(stdout); 
#endif 

//setup the error node 
errorjiode *errnode; 

assert(errnode = malloc(sizeof(*errnode))); 
errnode->rule_id = rulejd; 
errnode->rule_server_id - rule_server_id; 
errnode->monitorjd = mon->monitor_id; 
errnode->monitor_serverjd = mon->monitor_server_id; 
errnode->failed = 0; 

//evaluate the rule and generate the errnode->failed and errnode->message values 
char *sql_query; 
int n; 

MYSQL_RES 'result; 
MYSQL_ROW row; 



long double stddev_value; 
unsigned long int seconds; 
seconds = time_to_seconds(time_value, timejjnit); 



assert(sql_query = malloc(800)); 
n=snprintf(sql_query, 800, "SELECT AVG(gauge) AS mean, STDDEV(gauge) AS stddev FROM 
eventjog " 

"WHERE monitorjd=%d AND monitor_server_id=%d AND timestamp >= 
DATE_SUB{NOW(), INTERVAL %d SECOND) " 

"AND timestamp <= NOW() GROUP BY monitor jd", mon->monitor_id, 



mon->monitor_serverjd, seconds); 

if(mysqlj*eal_query(mysql_connection, sql_query, n)!=0) { 
fiockfile(stderr); 

fprintf(stderr, "Failed while attempting to select mean/stddev for stdev rule ... aborting rule 
evaluation: Error: %s\n", mysql_error(mysql_connection)); 
funlockfile(stderr); 
free(errnode); 
free(sql_query); 
return; 

} 

free(sql_query); 

//store results from last query into result 
result=mysqLstore__result(mysql_connection); 
row= mysqlj etchjrow( result) ; 
if(row==NULL) { 
flockfile(stderr); 

fprintf(stderr, "Couldn't fetch violation count for cumulative rule, aborting rule evaluation. .An"); 

funlockfile(stderr); 

free(errnode); 

mysqLfree_result(result); 

return; 

} 

if(strcmp(operator, M exceeds")==0) 
stddev_value = atof(row[0])+(value*atof(row[1])); 
else 

stddev_vaiue = atof(row[0])-(value*atof(row[1])); 
mysql_free_result(result); 

char stddev_valuejjnit_adjusted[strlen(mon->unit)+10]; 
long double stddev_value_adjusted; 

stddev_value_adjusted = format_value(stddev_value_unit_adjusted, stddev_value/mon->divisor, 
mon->unit); 

char poll_value_unit_adjusted[strlen(mon->unit)+10]; 
long double poll_value_adjusted; 

poll_value_adjusted = format_value(poll_value_unit_adjusted, poll_value/mon->divisor, mon->unit); 



if(strcmp(operator, "exceeds M )==0 && poll_value > stddev_value) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Fails standard deviation 
threshold: %1.2Lf%s " 

"exceeds mean + %1.2Lf stddev (%1.2Lf %s) as taken over %d %s", polLvalue_adjusted, 
poll_value_unit__adjusted, 

value, stddev_value_adjusted, stddev_value_unit_adjusted, time_value, 

time_unit); 

} else if(strcmp(operator, "falls below")==0 && poll_value < stddev_value) { 
errnode->failed = 1 ; 

snprintf(errnode->message, sizeof(errnode->message), "Fails standard deviation 
threshold: %1.2Lf %s" 

"falls below mean - %1 .2Lf stddev (%1 2Lf %s) as taken over %d %s", 
poll_vaiue_adjusted t poll_value_unit_adjusted, 



value, stddev_value_adjusted, stddev_value_unit_adjusted, time_value, 

timejjnit); 

} else if(strcmp(operator, "exceeds")==0) { 
errnode->failed = 0; 

snprintf(errnode->message, sizeof(errnode-> message), "Passes standard deviation 
threshold: %1.2Lf %s M 

"does not exceed mean + %1.2Lf stddev (%1.2Lf %s) as taken over %d %s", 
poll_value_adjusted, poll_value_unit_adjusted, 

value, stddev_value_adjusted, stddev_value_unit_adjusted, time_value, 

timejjnit); 

} else if(strcmp(operator, "falls below")==0) { 
errnode->failed = 0; 

snprintf(errnode->message, sizeof(errnode->message), "Passes standard deviation 
threshold: %1.2Lf %s" 

"does not fail below mean - %1.2Lf stddev (%1.2Lf %s) as taken over %d %s", 
poll_value_adjusted, poll_value_unit_adjusted, 

value, stddev_value_adjusted, stddev_value_unit_adjusted, time_value, 

time__unit); 
} 

//push error_node 
pthread_mutex_lock(&error_work_queue.mutex); 
queue_put(&error_work_queue.c_queue, (queuejiode *)errnode); 

pthread_mutex_unlock(&error_work_queue.mutex); 

pthread_cond_broadcast(&error_work_queue.cond); 

return; 

} 



